import requests
import re
import time
from multiprocessing import Pool

def get_res(url):
    headers={'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'}
    response=requests.get(url,headers=headers)
    return response.text


def get_info(info_url):
    info_res = get_res(info_url)

    ids = re.findall('<h2>(.*?)</h2>', info_res, re.S)
    levels = re.findall('<div class="articleGender \D+Icon">(.*?)</div>', info_res, re.S)
    sexs = re.findall('<div class="articleGender (\D+)Icon">.*?</div>', info_res, re.S)
    contents = re.findall('<div class="content">.*?<span>(.*?)</span>.*?</div>', info_res, re.S)
    laughs = re.findall('<span class="stats-vote"><i class="number">(\d+)</i>', info_res, re.S)
    comments = re.findall('<i class="number">(\d+)</i> 评论', info_res, re.S)
    #     print(comments)
    for ida, level, sex, content, laugh, comment in zip(ids, levels, sexs, contents, laughs, comments):
        data = {'ida': ida.strip(),
                'level': level.strip(),
                'sex': sex.strip(),
                'content': content.strip(),
                'laugh': laugh.strip(),
                'comment': comment.strip(),
                }
        # print(data)

if __name__=='__main__':
    url='https://www.qiushibaike.com/text/page/{}/'
    urls=[url.format(str(i)) for i in range(0,20)]
    start_1=time.time()
    for url_single in urls:
        get_info(url)
    end_1=time.time()
    print('串行爬虫:',end_1-start_1)

    start_2 = time.time()
    pool = Pool(processes=2)
    pool.map(get_info, urls)
    end_2 = time.time()
    print('两个进程:',end_2-start_2)

    start_3 = time.time()
    pool = Pool(processes=4)
    pool.map(get_info, urls)
    end_3 = time.time()
    print('四个进程:',end_3-start_3)